In [ ]:
# Dr. M. Baron, Statistical Machine Learning class, STAT-427/627
# DEEP LEARNING
# Import necessary libraries
! pip install pandas;
! pip install numpy;
! pip install scikit-learn;
! pip install matplotlib;
! pip install seaborn;
! pip install tensorflow;
! pip install ISLP;
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from tensorflow import keras
from tensorflow.keras import layers
from ISLP import load_data
In [71]:
# Load the datasets
url = "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/USArrests.csv"
USArrests = pd.read_csv(url, index_col=0)
# Check the first few rows of the dataset
print(USArrests.head())
Murder Assault UrbanPop Rape rownames Alabama 13.2 236 58 21.2 Alaska 10.0 263 48 44.5 Arizona 8.1 294 80 31.0 Arkansas 8.8 190 50 19.5 California 9.0 276 91 40.6
In [72]:
# Standardize data
X = StandardScaler().fit_transform(USArrests)
features = USArrests.columns # Get column names for the features
In [78]:
# Add state names as row labels (in this case, USArrests' index has state names)
state_names = USArrests.index
# Perform PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
# Plot the PCA components
plt.figure(figsize=(12, 9))
plt.scatter(X_pca[:, 0], X_pca[:, 1], alpha=0.5)
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.title("Biplot with State Labels and Enhanced Arrows")
# Add labels for each state
for i, state in enumerate(state_names):
plt.text(X_pca[i, 0], X_pca[i, 1], state, ha='right', color='blue', fontsize=8)
# Plot arrows (loadings) for each feature
for i, feature in enumerate(features):
plt.arrow(0, 0,
pca.components_[0, i] * 2, # Adjust 2 to make arrows longer or shorter
pca.components_[1, i] * 2,
color='red',
width=0.02, # Thickness of the arrow
head_width=0.1) # Width of the arrowhead
plt.text(pca.components_[0, i] * 2.2, pca.components_[1, i] * 2.2,
feature, color='red', ha='center', va='center')
plt.grid()
plt.show()
In [79]:
from ISLP import load_data;
# Load the Auto dataset from package ISLP
Auto = load_data('Auto')
In [80]:
# Prepare features and target variables
X = Auto[['weight', 'acceleration', 'horsepower', 'cylinders']]
y = Auto['mpg']
# Split data into training and testing sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.5, random_state=1)
In [83]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
In [89]:
# 1. Artificial Neural Network with no hidden layers (linear regression)
model_no_hidden = keras.Sequential([
layers.Input(shape=(4,)), # 4 input features
layers.Dense(1) # No hidden layers
])
model_no_hidden.compile(optimizer='adam', loss='mean_squared_error')
# Train the model
model_no_hidden.fit(X_train_scaled, y_train, epochs=100, verbose=0)
# Make predictions and calculate MSE
y_pred_no_hidden = model_no_hidden.predict(X_test_scaled)
mse_no_hidden = mean_squared_error(y_test, y_pred_no_hidden)
# Print MSE
print(f'MSE for no hidden layer ANN: {mse_no_hidden:.2f}')
# Function to draw ANN
def draw_ann(weights, layer_sizes, title):
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111)
# Calculate positions for layers
layers_pos = [np.linspace(0, size - 1, size) for size in layer_sizes]
# Draw nodes for each layer
for i, layer_size in enumerate(layer_sizes):
for j in range(layer_size):
ax.scatter(i, layers_pos[i][j], s=200, color='lightblue', edgecolor='black', zorder=5)
# Draw arrows and display weights
for i in range(len(weights)):
weight_matrix = weights[i][0] # First element is the weight matrix
for j in range(weight_matrix.shape[1]): # Iterate through output nodes
for k in range(weight_matrix.shape[0]): # Iterate through input nodes
# Draw arrows from input to output nodes
ax.arrow(i, layers_pos[i][k], 1, (layers_pos[i + 1][j] - layers_pos[i][k]) * 0.2,
head_width=0.1, head_length=0.1, fc='k', ec='k', zorder=4)
# Display weight value as text
ax.text(i + 0.5, layers_pos[i][k] + (layers_pos[i + 1][j] - layers_pos[i][k]) * 0.1,
f'{weight_matrix[k, j]:.2f}', fontsize=12, ha='center', va='center')
ax.set_title(title)
ax.set_xticks(np.arange(len(layer_sizes)))
ax.set_xticklabels([f'Layer {i + 1}' for i in range(len(layer_sizes))])
ax.set_yticks([])
ax.set_ylim(-1, layer_sizes[-1] + 1)
plt.show()
# Draw the ANN with no hidden layers
weights_no_hidden = model_no_hidden.layers[0].get_weights()
draw_ann([weights_no_hidden], [4, 1], 'ANN with No Hidden Layers')
4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step MSE for no hidden layer ANN: 530.30
In [91]:
# 2. ANN with 3 hidden nodes
model_3_hidden = keras.Sequential([
layers.Input(shape=(4,)), # 4 input features
layers.Dense(3, activation='relu'), # 3 hidden nodes
layers.Dense(1) # Output layer
])
model_3_hidden.compile(optimizer='adam', loss='mean_squared_error')
# Train the model
model_3_hidden.fit(X_train_scaled, y_train, epochs=100, verbose=0)
# Make predictions and calculate MSE
y_pred_3_hidden = model_3_hidden.predict(X_test_scaled)
mse_3_hidden = mean_squared_error(y_test, y_pred_3_hidden)
# Print MSE
print(f'MSE for 3 hidden nodes ANN: {mse_3_hidden:.2f}')
# Draw the ANN with 3 hidden nodes
weights_3_hidden = [
model_3_hidden.layers[0].get_weights(), # Weights from input to first hidden layer
model_3_hidden.layers[1].get_weights() # Weights from hidden to output layer
]
draw_ann(weights_3_hidden, [4, 3, 1], 'ANN with 3 Hidden Nodes')
4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step MSE for 3 hidden nodes ANN: 315.71
In [93]:
# 3. Multilayer structure with 3 and 2 hidden nodes
model_3_2_hidden = keras.Sequential([
layers.Input(shape=(4,)),
layers.Dense(3, activation='relu'),
layers.Dense(2, activation='relu'), # 2 hidden nodes
layers.Dense(1)
])
model_3_2_hidden.compile(optimizer='adam', loss='mean_squared_error')
# Train the model
model_3_2_hidden.fit(X_train_scaled, y_train, epochs=100, verbose=0)
# Make predictions and calculate MSE
y_pred_3_2_hidden = model_3_2_hidden.predict(X_test_scaled)
mse_3_2_hidden = mean_squared_error(y_test, y_pred_3_2_hidden)
# Print MSE
print(f'MSE for 3-2 hidden nodes ANN: {mse_3_2_hidden:.2f}')
# Draw the ANN with 3 and 2 hidden nodes
weights_3_2_hidden = [
model_3_2_hidden.layers[0].get_weights(), # Weights from input to first hidden layer
model_3_2_hidden.layers[1].get_weights(), # Weights from first hidden to second hidden layer
model_3_2_hidden.layers[2].get_weights() # Weights from second hidden to output layer
]
draw_ann(weights_3_2_hidden, [4, 3, 2, 1], 'ANN with 3 and 2 Hidden Nodes')
4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step MSE for 3-2 hidden nodes ANN: 229.91
In [ ]: